load("C:/Users/nikos/Desktop/SAE_proj_data.RData")
library(microbenchmark)
library(parallel)
library(mvtnorm)
# --------------------------------------------------------------------------------------------#
# ------------------------------- calculating the sd of y ------------------------------------#
# --------------------------------------------------------------------------------------------#
# Alternative 0 ###############################################################################
# loop: for every predicted y_hat, calculate std of y_hat and put it in a vector
varsofy0 <- function(n_obs_census1, cov_coefficients1, x_census1){
var_y0 <- rep(NA, n_obs_census1)
for(i in 1:n_obs){
var_y0[i] <- t(x_census1[i,]) %*% cov_coefficients1 %*% x_census[i,]
}
return(var_y0)
}
# Alternative 1 ###############################################################################
# loop, but matrix is transposed outside the loop
varsofy1 <- function(n_obs_census1, cov_coefficients1, x_census1){
tx_census1 <- t(x_census1)
var_y1 <- rep(NA, n_obs_census1)
for(i in 1:n_obs){
var_y1[i] <- tx_census1[,i] %*% cov_coefficients1 %*% x_census[i,]
}
return(var_y1)
}
# Alternative 2 ###############################################################################
# with apply function
varsofy2 <- function(n_obs_census1, cov_coefficients1, x_census1){
var_y2 <- rep(NA, n_obs_census1)
var_y2 <- apply(X = x_census1, MARGIN = 1, FUN = function(x){t(x) %*% cov_coefficients1 %*% x})
return(var_y2)
}
# Alternative 3 ###############################################################################
# with apply and parallelisation
varsofy3 <- function(n_obs_census1, cov_coefficients1, x_census1){
var_y3 <- rep(NA, n_obs_census1)
number_of_cores <- parallel::detectCores() - 1 # calculate number of cores to use
cl <- makeCluster(number_of_cores) # initiate clusters for parallelisation
parallel::clusterExport(cl, c("var_y3", "x_census", "cov_coefficients"))
a <- Sys.time()
var_y3 <- parallel::parApply(cl, X = x_census, MARGIN = 1, FUN = function(x){t(x) %*% cov_coefficients %*% x})
b <- Sys.time()
parallel::stopCluster(cl)
return(a-b)
}
varsofy3(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census)
# nicht mal die alleinige Anwendung der Parallelisierung scheint einen sinnvollen Unterschied zu machen
# Alternative 4 ###############################################################################
# with C++
mbm <- microbenchmark::microbenchmark(
alt1 = varsofy1(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
alt2 = varsofy2(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
alt3 = varsofy3(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
times = 1)
mbm
mbm <- microbenchmark::microbenchmark(
alt0 = varsofy0(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
alt1 = varsofy1(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
times = 3)
mbm
# --------------------------------------------------------------------------------------------#
# -------------------------- drawing the normal distr. sample --------------------------------#
# --------------------------------------------------------------------------------------------#
xbeta <- predict(inference_survey$model_fit_surv, newdata = censusdata)
# Alternative 1 ###############################################################################
# using univariate normal distribution and loop
y_random1 <- function(n_obs_census1, n_boot1 = 5, xbeta1, sd_y1){
y_bootstrap <- matrix(NA, nrow = n_obs_census1, ncol = n_boot1)
for (i in 1:n_obs_census1){
y_bootstrap[i,] <- rnorm(n = n_boot1, mean = xbeta1[i], sd = sd_y1[i])
}
}
# Alternative 2 ###############################################################################
# using univariate normal distribution and apply
y_random2 <- function(n_obs_census1, n_boot1 = 5, xbeta1, sd_y1){
y_bootstrap <- matrix(NA, nrow = n_obs_census1, ncol = n_boot1)
m <- cbind(xbeta1, sd_y1)
y_bootstrap <- apply(X = m, MARGIN = 1,
FUN = function(x, n_boot2){rnorm(n = n_boot1, mean = x[1], sd = x[2])},
n_boot2 = n_boot1)
}
# all(y_random1(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y) == y_random2(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y))
# Ergebnis ist gleich. Microbenchmarking: Alt1 leicht besser als Alt2
# Alternative 3 ###############################################################################
# using multivariate normal distribution and loop
y_random1 <- function(n_obs_census1, n_boot1 = 5, xbeta1, sd_y1){
y_bootstrap <- matrix(NA, nrow = n_obs_census1, ncol = n_boot1)
for (i in 1:n_obs_census1){
y_bootstrap[i,] <- rnorm(n = n_boot1, mean = xbeta1[i], sd = sd_y1[i])
}
}
mbm <- microbenchmark::microbenchmark(
alt1 = y_random1(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y),
alt2 = y_random2(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y),
times = 1)
mbm
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.